{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 树回归与标准回归的比较"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**说明:**\n",
    "\n",
    "将 `bikeSpeedVsIq_train.txt` 和 `bikeSpeedVsIq_test.txt` 放在当前目录下。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from numpy import *\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 回归树的叶节点生成函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def regLeaf(dataSet):#returns the value used for each leaf\n",
    "    return mean(dataSet[:,-1])\n",
    "\n",
    "def regErr(dataSet):\n",
    "    return var(dataSet[:,-1]) * shape(dataSet)[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 模型树的叶节点生成函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def linearSolve(dataSet):   #helper function used in two places\n",
    "    m,n = shape(dataSet)\n",
    "    X = mat(ones((m,n))); Y = mat(ones((m,1)))#create a copy of data with 1 in 0th postion\n",
    "    X[:,1:n] = dataSet[:,0:n-1]; Y = dataSet[:,-1]#and strip out Y\n",
    "    xTx = X.T*X\n",
    "    if linalg.det(xTx) == 0.0:\n",
    "        raise NameError('This matrix is singular, cannot do inverse,\\n\\\n",
    "        try increasing the second value of ops')\n",
    "    ws = xTx.I * (X.T * Y)\n",
    "    return ws,X,Y\n",
    "\n",
    "def modelLeaf(dataSet):#create linear model and return coeficients\n",
    "    ws,X,Y = linearSolve(dataSet)\n",
    "    return ws\n",
    "\n",
    "def modelErr(dataSet):\n",
    "    ws,X,Y = linearSolve(dataSet)\n",
    "    yHat = X * ws\n",
    "    return sum(power(Y - yHat,2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 树的生成函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def binSplitDataSet(dataSet, feature, value):\n",
    "    mat0 = dataSet[nonzero(dataSet[:,feature] > value)[0],:]\n",
    "    mat1 = dataSet[nonzero(dataSet[:,feature] <= value)[0],:]\n",
    "    return mat0,mat1\n",
    "\n",
    "def chooseBestSplit(dataSet, leafType=regLeaf, errType=regErr, ops=(1,4)):\n",
    "    tolS = ops[0]; tolN = ops[1]\n",
    "    #if all the target variables are the same value: quit and return value\n",
    "    if len(set(dataSet[:,-1].T.tolist()[0])) == 1: #exit cond 1\n",
    "        return None, leafType(dataSet)\n",
    "    m,n = shape(dataSet)\n",
    "    #the choice of the best feature is driven by Reduction in RSS error from mean\n",
    "    S = errType(dataSet)\n",
    "    bestS = inf; bestIndex = 0; bestValue = 0\n",
    "    for featIndex in range(n-1):\n",
    "        for splitVal in set(dataSet[:,featIndex].T.tolist()[0]):\n",
    "            mat0, mat1 = binSplitDataSet(dataSet, featIndex, splitVal)\n",
    "            if (shape(mat0)[0] < tolN) or (shape(mat1)[0] < tolN): continue\n",
    "            newS = errType(mat0) + errType(mat1)\n",
    "            if newS < bestS: \n",
    "                bestIndex = featIndex\n",
    "                bestValue = splitVal\n",
    "                bestS = newS\n",
    "    #if the decrease (S-bestS) is less than a threshold don't do the split\n",
    "    if (S - bestS) < tolS: \n",
    "        return None, leafType(dataSet) #exit cond 2\n",
    "    mat0, mat1 = binSplitDataSet(dataSet, bestIndex, bestValue)\n",
    "    if (shape(mat0)[0] < tolN) or (shape(mat1)[0] < tolN):  #exit cond 3\n",
    "        return None, leafType(dataSet)\n",
    "    return bestIndex,bestValue#returns the best feature to split on\n",
    "                              #and the value used for that split\n",
    "\n",
    "def createTree(dataSet, leafType=regLeaf, errType=regErr, ops=(1,4)):#assume dataSet is NumPy Mat so we can array filtering\n",
    "    feat, val = chooseBestSplit(dataSet, leafType, errType, ops)#choose the best split\n",
    "    if feat == None: return val #if the splitting hit a stop condition return val\n",
    "    retTree = {}\n",
    "    retTree['spInd'] = feat\n",
    "    retTree['spVal'] = val\n",
    "    lSet, rSet = binSplitDataSet(dataSet, feat, val)\n",
    "    retTree['left'] = createTree(lSet, leafType, errType, ops)\n",
    "    retTree['right'] = createTree(rSet, leafType, errType, ops)\n",
    "    return retTree  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 用树回归进行预测的代码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def isTree(obj):\n",
    "    return (type(obj).__name__=='dict')\n",
    "\n",
    "def regTreeEval(model, inDat):\n",
    "    return float(model)\n",
    "\n",
    "def modelTreeEval(model, inDat):\n",
    "    n = shape(inDat)[1]\n",
    "    X = mat(ones((1,n+1)))\n",
    "    X[:,1:n+1]=inDat\n",
    "    return float(X*model)\n",
    "\n",
    "def treeForeCast(tree, inData, modelEval=regTreeEval):\n",
    "    if not isTree(tree): return modelEval(tree, inData)\n",
    "    if inData[tree['spInd']] > tree['spVal']:\n",
    "        if isTree(tree['left']): return treeForeCast(tree['left'], inData, modelEval)\n",
    "        else: return modelEval(tree['left'], inData)\n",
    "    else:\n",
    "        if isTree(tree['right']): return treeForeCast(tree['right'], inData, modelEval)\n",
    "        else: return modelEval(tree['right'], inData)\n",
    "        \n",
    "def createForeCast(tree, testData, modelEval=regTreeEval):\n",
    "    m=len(testData)\n",
    "    yHat = mat(zeros((m,1)))\n",
    "    for i in range(m):\n",
    "        yHat[i,0] = treeForeCast(tree, mat(testData[i]), modelEval)\n",
    "    return yHat"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 导入数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def loadDataSet(fileName):      #general function to parse tab -delimited floats\n",
    "    dataMat = []                #assume last column is target value\n",
    "    fr = open(fileName)\n",
    "    for line in fr.readlines():\n",
    "        curLine = line.strip().split('\\t')\n",
    "        fltLine = list(map(float,curLine)) #map all elements to float()\n",
    "        dataMat.append(fltLine)\n",
    "    return dataMat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "trainMat = mat(loadDataSet('bikeSpeedVsIq_train.txt'))\n",
    "testMat = mat(loadDataSet('bikeSpeedVsIq_test.txt'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3X+U3XWd3/HnO8OoE7tmYAkKkwxB\nTzauECBkFtPSbvnRGlCEWZE9cECzLm3aLd3uunZWWDkNnIMru3HXted06YlCwaMb5JdjqLToASwt\nNeiEEGPUVEQgmbAmNoRuJZVh8u4f93vjzOT7vfd+P987n3u/3/t6nOPJzOfeO/fDnfF9v/f9eX/e\nH3N3RESkuhZ0egIiIjK/FOhFRCpOgV5EpOIU6EVEKk6BXkSk4hToRUQqToFeRKTiFOhFRCpOgV5E\npOKO6/QEAE488URftmxZp6chIlIq27Zt+5m7L252v64I9MuWLWNiYqLT0xARKRUze6GV+yl1IyJS\ncQr0IiIVp0AvIlJxCvQiIhWnQC8iUnFdUXUjIlJW49sn2fjIbvYdOswpgwOMrV3B6KqhTk9rFgV6\nEamkGAF4fPskNz64k8NT0wBMHjrMjQ/uBOiqYK9ALyKVEysAb3xk99HnqDs8Nc3GR3Y3fZ6YnwSU\noxeRymkUgNtp36HDucbr6m9Ek4cO4/zyjWh8+2Rb51enQC8ilRMagPMaXNifa7wu1htRnQK9iFTO\nKYMDucZDuecbr5vMeMPJGi+qaaA3szvNbL+ZfW/G2NlmttXMnjGzCTM7Nxk3M/v3ZvasmX3XzM6Z\nl1mLiDQwtnYFA/19s8YG+vsYW7uirc/zyuGpXON1fWa5xotq5Yr+LuDiOWN/Dtzi7mcD/y75HuAS\nYHnyv/XA7e2ZpohI60ZXDfGpD6xkaHAAA4YGB/jUB1a2fbEz9JPDdMYlf9Z4UU2rbtz9CTNbNncY\neEvy9SJgX/L15cAX3N2BrWY2aGYnu/tLbZqviEhLRlcNzXuJ49jaFbOqe6C1Tw5DgwOpaZqhNqeW\n6kJz9H8IbDSzPcCngRuT8SFgz4z77U3GjmFm65O0z8SBAwcCpyEi0jmjq4a4YvXQ0ZRLnxlXrG7+\nBnPBO9NbyGeNFxUa6H8P+Ki7LwU+CtyRjKclmFI/i7j7JncfcfeRxYvn5z9ORGQ+jW+f5IFtk0dT\nLtPuPLBtsmmZ5OM/TL+4zRovKjTQrwMeTL6+Dzg3+XovsHTG/Zbwy7SOiEilhJZJxir/rAsN9PuA\nf5x8fSHwo+TrLcCHk+qbNcArys+LSFWFBuxY5Z91rZRXbga+Bawws71mdh3wz4G/MLMdwJ9Sq7AB\neBh4DngW+Bzwr+Zl1iIiXSA0YMcq/6xrperm6oybVqfc14Hri05KRKSoGL1kxtauYOz+HUxN/3Ip\nsr/Pmgbs+jxi9bpRUzMRqZzQpmZBbw5zy01aLIWPUf5ZpxYIIlI5IYukIY3GNj6ym6kjsyP71BGf\nt541oXRFLyJdL++VdsgiaUjL4djVM6F0RS8iXS3kSjtkkTQkaMeungmlQC8iXS0kDRNS1RIStMfW\nrqB/wex9ov0Lmi/GQu0N7LzbHuO0G77Gebc9Nm+96EGBXkS6XMiVdkhTs7G1K+jvmxO0W6igOaYf\nQAsNKGMfPKIcvYh0tVMyGoA1S48EVbXkrKDZ+MjuWaWVAFPT3vQowSJHEIbQFb2IdLVYm4tCKmhC\nF2NjL+Lqil5EospbQRNrc1HIqU+hnzZCHxdKV/QiEk3s3PR8C/20EbsFggK9iEQTayNTLKOrhjhn\neNGssXOGFzX9tBHrBKw6pW5EJJpYG5liuWl8J0/++OCssSd/fJCbxndy6+jKho+N2QJBgV5EgoT0\nhQnJTYcuXN40vpPNT+1h2p0+M65+99KmwTevzU/tyRxv93MVodSNiOQWmk4JyU0PLuzPNQ61IP/F\nrS/OOvnpi1tf5KbxndnPM5DxPBnj9Z+bZ7xTFOhFJLfQk5VCctpZMbNRLG10pZ3FMjY6ZY0DR8+K\nbXW8U1o5eOROM9tvZt+bM/77ZrbbzHaZ2Z/PGL/RzJ5Nbls7H5MWkc4qkk7JymlnOXR4Ktc4hF1p\nH3o143kyxgHWvP34XOOd0soV/V3AxTMHzOwC4HLgTHc/Hfh0Mv4u4Crg9OQxf21msz+niUjphTbz\nCrnSjnXVHPLf9P2X/i7X+Exd1evG3Z8ADs4Z/j3gNnf/RXKf/cn45cA97v4Ld/8JtSMFz0VEulre\noBNaBx5ypR0rDx7y3/RyxtV+1nhd7JLR0Bz9rwH/yMyeMrP/Zma/kYwPATPfmvcmYyLSpUKCTmgd\neMjV+VDGFXXWeOhjYta2h65xhAotrzwOOB5YA/wGcK+ZvZ30vm2pb7tmtp7kUPHh4eHAaYhIUaF1\n6iF14Fe/eylf3Ppi6niWsbUrGLtvx6w+NM1aAV/wzsWpz3PBOxc3nF/e/6bBgf7UtYJGlToQv9dN\n6BX9XuBBr/k2cAQ4MRmf+RtbAuxL+wHuvsndR9x9ZPHixi++iMyfmEHn1tGVXLtm+OgVfJ8Z164Z\nbl5znrMV8OM/PJBrPNSlZ52ca7wu9oEloYF+HLgQwMx+DXgD8DNgC3CVmb3RzE4DlgPfbsdERWR+\nhNSpFzFy6gm8bdGbMOBti97EyKknNLx/o1bAWWK9eYW+ocTuddM0dWNmm4HzgRPNbC+wAbgTuDMp\nuXwNWOfuDuwys3uB7wOvA9e7+3T6TxaRbhBSpx6qvh5QTxXV1wOAzJRJzK6SeYXMDeJ15KxrGujd\n/eqMm67NuP8ngU8WmZSIhMvbmuCVjHr0rPEiQtYD+sxSK2waLeCOrV0x6w0F5ueKOWRudTF73Whn\nrEiFxDpIO1RISiWkvHJ01RBXrB6atRZwxer2B1a1QBCR6GIdpA21Xa7vuPFhlt3wNd5x48MNd7fW\nhawHHJ9xW9Y41N7wHtg2OavXzQPbJttepx5SxtkJCvQiFRLrIO2QpmEQth4Q8phYdeqxF1VDqU2x\nSIXEOkg7tD1vyHpAyGNCF0nzir2oGkqBXqRCQjcK5V3ADc1Nh7wRhTymyCJpXjEXVUMpdSNSISF1\n3SELuKGNxkJSHSGPKcsiaSwK9CIV0u6j+rJktSxo1MoAwtYDQh5TlkXSWJS6EamQWEf11fPwIUf1\nxUh1xKqjLwsFepEuljd3HpKjD13AvXV0ZZRzUce3T85qajZ56DBj9+0AsnfTlmWRNBYFepEuFRLg\nQnL03X71e/OWXbM6VwJMHXFu3rKr7d01q0o5epEu1SjAZYlVRx9TyFGCMpuu6EW6VEiAi1VHD/nT\nSrF1+/xi0hW9SIVk5eKb1dHnFfMovNAWCDGP6ut2CvQiXWpBRkl61jjEO3Aj5lF4G95/Ov19s/+j\n+/uMDe8/vSvmVwZK3Yh0qSMZe3uyxiHegRsxT6UKqaCJfVRft2vl4JE7gUuB/e5+xpzb/i2wEVjs\n7j8zMwM+C7wXeBX4HXd/uv3TFqm+oYx8e6NNP7EO3Bhc2M/Lr6aclTpPp1LlXUMIfR2qmtdvJXVz\nF3Dx3EEzWwr8U2Bm0e4l1I4PXE7t4O/bi09RpDeNrV1B/5w8TbNDscfWrkhNc7S7VDLmqVQhQtom\nVDmv3zTQu/sTwMGUmz4D/DEw81d7OfCF5NDwrcCgmTU+JVdEMh1p8n2a6Tnnq879vh1inkoVIqRk\ntMp5/aAcvZldBky6+w6b3cRoCJjZv3RvMvZS8AxFetQtD+1iek5CfvqIc8tD2RuFbt6yK/XNodnm\norwWDfSnlnkuGmieuomVHsmb7qlyXj93oDezhcAngPek3Zwylno5YWbrqaV3GB4ezjsNkVIJCW5p\nOfBG4xC+uSjv/Kam0z9bZI3PfJ68h4PHEmt9oxNCyivfAZwG7DCz54ElwNNm9jZqV/Az29ctAfal\n/RB33+TuI+4+snhxe2t8RbpJt+d+Q+b389emc43XhaZHxrdPct5tj3HaDV/jvNsem5fXriynRYXI\nHejdfae7n+Tuy9x9GbXgfo67/y2wBfiw1awBXnF3pW2kp4UGt8GMNEjWOIRtLoqZmw5Jj8R6o+z2\nVhBFtFJeuRk4HzjRzPYCG9z9joy7P0yttPJZauWVH2nTPEVKKzT3e/Nlp89qaga1qpubL8veKPS+\nM09O7V75vjOzayJi5qZDyjIbvRG1OwhXtRFa00Dv7lc3uX3ZjK8duL74tESqo0j/Gci3UShkZ2zM\n3HRIWWaVF0ljUQsEkXkWM/cbEhRjzi+kLDPrDacKi6SxKNCLzLPQ3G9IbjokKIbML6vdTrOjt0Pm\nV+VF0ljU60Ykp5BSyZDcb0huOuSEqZD5XbNmOPV5rlnTuFQ65JATnRZVnAK9SA4x68BD0jCxuleG\nnhkbGrSrukgaiwK9SA4xK0BiHfQdKvTMWAXt+JSjF8khLfA2Gi8ipKmZFi4ljQK99LS8Oy77LH25\nMWu8sLk/tsnTaOFS0ijQS88KqWqZzij4zhovYuMju5ma03lyatob7lit8u5OCaccvfSskHx7yGEg\noULz7VU86FuK0RW99KzQzUV58+ahQvPtedNR3d50TYpToJeeFbxwmTNvHirWKUlVPnBDahTopWeF\nBNKQvHmoWKckqZdM9SlHLz0rZPNOzPLK+hzn+5SkKh+4ITUK9NLT8gbSPrPUCpt5K6/MKaQNcEhb\nAtACbpko0IvkELO8MkRIG+CQTzbdfCSgHEuBXiSHmOWVIULPjM37ySZmKwgprulirJndaWb7zex7\nM8Y2mtkPzey7ZvYVMxuccduNZvasme02s7XzNXGRTsjqAtmsO2QssXbuagG3XFqpurkLuHjO2DeA\nM9z9TOB/ATcCmNm7gKuA05PH/LWZ9SESQYwDpEO7Q8aYG8RLLamnTrk0DfTu/gRwcM7Y19399eTb\nrcCS5OvLgXvc/Rfu/hNqZ8ee28b5iqSKteknpOom5oakrBRSu1NL6qlTLu2oo/9d4L8kXw8Be2bc\ntjcZO4aZrTezCTObOHCgvb2ypffE2vQTkhqJuSEpVgBWT51yKbQYa2afAF4HvlQfSrlb6mdGd98E\nbAIYGRnpjpIFKa3QnHHeEsGQ1EjMfHbM05jUV748ggO9ma0DLgUucj/6V74XWDrjbkuAfeHTE2lN\nyKafkBLBkKqb2BuSFIBlrqDUjZldDHwcuMzdX51x0xbgKjN7o5mdBiwHvl18miKNhbYzyJtSCXke\n5bOl05pe0ZvZZuB84EQz2wtsoFZl80bgG1bLTW5193/p7rvM7F7g+9RSOte7+3T6TxZpn5CURUhK\nZXTVEBMvHJx1VuoVqxtfQetwa+k08y7Y0TcyMuITExOdnoZ0kRjb68++5eupG4kGB/p5ZsN7MueV\n1i5AC5HSCWa2zd1Hmt1PO2Ol68TaXp9VKNNob1HojlD1hZFOUpti6TqxyhEPpTT/ajQOYekeHewh\nnaZAL12nSKlknt2nIbs7Qx6jgz2k0xTopeuEBNOQq+ZYFTTqCyOdpkAvXSdWqWTI7s6Qx6gvjHSa\nFmOl68QqlSwyvzwLqaEHe4i0iwK9dKW8wXTRQH9qqeSigeyTlWJV96iOXjpNgV4qIWapZAi1JZBO\nUqCXSohVKhmb6u+lHRTopSvlDXAhjcNiNxvLS+eySruo6ka6zvj2Scbu3zGrVHLs/h1tL5UMPRYw\n1mlRqr+XdlGgl65zy0O7mJqe3YNpatq55aFdmY8JKXsMORYw5i7XMqSWpByUupGu83JGXj1rvC7v\ngmdIII25gNvtqSUpD13RS88K2cgU8ypbfeylXRTopesMZtS+Z42HCsnRx9zlqnNZpV1aOXjkTmpH\nBu539zOSsROALwPLgOeB33b3l612CslngfcCrwK/4+5Pz8/UpapOP+VXePLHB1PH2ykkRx97l6vq\n76UdWrmivwu4eM7YDcCj7r4ceDT5HuASascHLgfWA7e3Z5rSS/7nc8cG+UbjoUJPmNJVtpRN0yt6\nd3/CzJbNGb6c2vGCAHcD36R2huzlwBeSw8K3mtmgmZ3s7i+1a8JSfVmHnrX7MLTQxU5dZUvZhObo\n31oP3sm/JyXjQ8CeGffbm4yJdB0tdkqvaHd5ZVpnkdTrMDNbTy29w/DwcJunIdKcmo1JrwgN9D+t\np2TM7GRgfzK+F1g6435LgH1pP8DdNwGboHY4eOA8RApRGkZ6QWjqZguwLvl6HfDVGeMftpo1wCvK\nz/e2WO0CRCRbK+WVm6ktvJ5oZnuBDcBtwL1mdh3wInBlcveHqZVWPkutvPIj8zBnKYmqNuVSR0kp\nm1aqbq7OuOmilPs6cH3RSUk1hLYLGOhfwOGpI6njnVbVNy+pts7/P0cqK7RdQNZZIQ3OEIlGHSWl\njNTUTFoWo0c8wKspV/ONxmNSR0kpI13RS0tC2vPGrlOPsfAbs9eNSLso0EtLQlIWo6uGWHL8m2aN\nLTn+TfOSy47VJ16brKSMFOilJSEpi2s+9y1+tP/ns8Z+tP/nXPO5b7V1bhAvd65eN1JGytFLS0Ly\n7WkdKBuNF5E2t0bjRWiTlZRNqa/otRknHqUsRMqrtFf049sn+dh9O5g+UuueMHnoMB+7bwegeub5\nELMvzML+BakVNgu7oI5epIxKG+g/8ZWdR4N83fQR5xNf2alAP0/ypiyWn/TmY3L09fFG/vQDZ/JH\n9z7DzF/vAquNi0h+pQ30P39tOte4xPez//tarvE6dZUUaa/SBnqJL++GqZdfnco1PlPeTw99Zkyn\nnEzSZ92wn1aksxTopSXd3uMlLcg3GhfpJaVd3cpqcNUNja+qKKROfXCgP9d4EUMZZZ5Z4yK9pLRR\n8YrVS3KNSzEhG6YuPevkXONFqPxTJFtpA/3jPzyQa1yKCenxEvN3pB2rItkK5ejN7KPAP6N2LuxO\nageNnAzcA5wAPA18yN0bl1kEUBfBuMbWrpi1bwGgb4E1vGKO/TvSjlWRdMFX9GY2BPwbYMTdzwD6\ngKuAPwM+4+7LgZeB69ox0bnURTCuiRcOpu5bmHghu52Bfkci3aFo6uY4YMDMjgMWAi8BFwL3J7ff\nDYwWfI5UysnGtfmpPbnGQb8jkW4RnLpx90kz+zS1M2MPA18HtgGH3P315G57gXn5LK1NNXGFlC/q\ndyTSHYIDvZkdD1wOnAYcAu4DLkm5a2okMLP1wHqA4eHhoDkoJxtP6IYk/Y5EOq9I6uafAD9x9wPu\nPgU8CPwDYDBJ5QAsAfalPdjdN7n7iLuPLF68uMA0JIar370017iIdI8iVTcvAmvMbCG11M1FwATw\nOPBBapU364CvFp2ktF/edgYjp57A5m/vOabqZuTUE9r6PCLSfsFX9O7+FLVF16eplVYuADYBHwf+\nyMyeBX4VuKMN85Q2Cjl2b+Mju1OrbhrtjI11vJ+INFaojt7dNwAb5gw/B5xb5Oe2SleLYRq1M8h6\n/UJq4kOeR0Tar7RNzbq9yVY3Cwnaiwb6OXT42K6Tixr0rdGmNpHuUNoWCLEOg66ikI1MU9PHnvjU\naDz0eep0TKRI+5Q20OtqMVzIRqaQg15CN0wpty/SXqUN9NpeH2501RDnDC+aNXbO8KK2p7xCG43p\n05pIe5U2Rz+2dsWsHD1oe32rbhrfyZM/nt2j5skfH+Sm8Z3cOroy9TGDGTn6Zr3lQzZM6dOaSHuV\n9op+dNUQV6weOrozs8+MK1ZrF2YrQvrWxOwtr09rIu1V2kA/vn2SB7ZNHt2WP+3OA9smlcdtQUjf\nmpi95dUMTaS9ShvolceNK2Y6RYeIiLRXaXP0oYFHm6zCnDI4wGTKaztf6RQ1QxNpn9Je0YfkcVW2\nV5PVcbJRJ8qxtSuO+WNZkIyLSHcrbaAPyeMq3VMT0oly4oWDzN0adSQZF5HuVtpAH5LHVdleza2j\nK7l2zfCsiqVr1wxnllZCWKWOiHSH0uboIX8eN3aeOZYY6w4hlToi0h1Ke0Ufoople+PbJxm7b8es\ndYex+3Y0XHe4aXwnX9z64qzS1C9ufZGbxndGmrWIxNRTgb6KZXs3b9nF1Jw+8VNHnJu37Mp8zN88\n9WKucREpt0KpGzMbBD4PnEHtbNjfBXYDXwaWAc8Dv+3uLxeaZRtVrWwvrS1Bo3GAIxnZlqxxESm3\nolf0nwX+q7u/EzgL+AFwA/Couy8HHk2+FxGRDgkO9Gb2FuA3SY4KdPfX3P0QcDlwd3K3u4HRopOU\nbMcvTG8qljXe7c8jIu1X5Ir+7cAB4D+Z2XYz+7yZvRl4q7u/BJD8e1Ib5ikZ3ndmelOxrHGABRn7\norLGATa8//Rjbl9gtXER6W5FAv1xwDnA7e6+Cvg5OdI0ZrbezCbMbOLAgfY3xuoVIc3G/v7bT8g1\nXtc3J9LP/V5EulORQL8X2OvuTyXf308t8P/UzE4GSP7dn/Zgd9/k7iPuPrJ48eIC0+htafsCGo0D\nPP+/02/LGofaruKp6TnVPdPec7uKRcooONC7+98Ce8ysXoR+EfB9YAuwLhlbB3y10AyloZC+NSE7\nhLWrWKS8iu6M/X3gS2b2BuA54CPU3jzuNbPrgBeBKws+hzQQsmN1cGE/L7+aclpUg4XVqu4qFukF\nhQK9uz8DjKTcdFGRnyut6zNLDeqNruiz3gMadTPQ0Y0i5VXqXjcSdkX/SsZmqqxx4OgmM/XyFymf\nngv0VTt4ZCgjpTLUIKUSmoap2q5ikV7RU71uQhqAdbuQRm1VbO4mItl6KtCHNADrdqOrhrhi9dCs\n3vJXrG585V3F5m4ikq2nUjchDcC63fj2Sb78nT2zWg5/+Tt7GDn1hKbBXoFdpDf0VKAP1c15/Vse\n2pW6kemWh3Z1zRxFpLN6KtAvsPRWvI128tcPFK+XFdYPFAe6IpCm1cM3GheR3tNTOfqQPuw6UFxE\nyq6nruhDShFDt/7HSvcMDvSnrjEMDqh9sIjU9NQVfUhZYVZbgEbtAurpnpllnDc+uHNeyjhvvux0\n+ufknvoXGDdfpvbBIlLTU4E+pBTx/81J2zQbh7jpntFVQ2y88qxZpZIbrzyrK9YPRKQ79FTqZnz7\nJA9sm5xVivjAtsmGpYiHp47kGoew1sFFqFRSRBrpqSt6LayKSC/qqUAfsrCqs1JFpOx6KtCHLKxu\neP/p9PfNWezss4ZnpYYcBlLE+PZJzrvtMU674Wucd9tjpe7dIyLt11OBPqQP++iqITZ+cM5i5wcb\nL3Ze/e6lucaLiFnhIyLlVHgx1sz6gAlg0t0vNbPTgHuAE4CngQ+5+2tFn6cdQvqwQ/7FzltHVwKw\n+alaD5o+M65+99Kj4+3UaN1BC7QiAu2puvkD4AfAW5Lv/wz4jLvfY2b/EbgOuL0Nz1PYoozNRYvm\nYXPRraMr5yWwz6WzXEWkmUKpGzNbArwP+HzyvQEXAvcnd7kbGC3yHO2UlSKfj9R5rLx51mEhOstV\nROqK5uj/CvhjoF5U/qvAIXd/Pfl+L5CaPzCz9WY2YWYTBw4cKDiN1hzKaPSVNR4qZt5ch4iISDPB\ngd7MLgX2u/u2mcMpd01d6nT3Te4+4u4jixcvDp1GLrGufmPvjM2721dEekuRK/rzgMvM7Hlqi68X\nUrvCHzSzeu5/CbCv0Azb6IJ3pr+hZI2HirkzNmu3r6puRKQuONC7+43uvsTdlwFXAY+5+zXA48AH\nk7utA75aeJZt8vgP01NEWeOhYtbRa7eviDQzH71uPg7cY2a3AtuBO+bhOYLEutKezijMzxqvC2lt\nrKobEWmmLYHe3b8JfDP5+jng3Hb83HbrM0sNtu2+0g7pex96ktXgwv7U06Qa7fYVkd7SUztjQ6+0\n8wqphAlNwYTs9hWR3tJTgT5W7jykEiY0BRO621dEekdPBfpYV/QhlTChpZ/aMCUizfRUoM/KkTfK\nnYcIScOEbnzShikRaaanAn2soBiShhldNcSnPrByVpfMT31gZdOqm9DHiUjv6KmjBOvBL28JY16n\nZFTdNEunhB4JqKMERaSRngr0EBYU89a3j61dMatUEpROEZHO6blAn1dIfXusTw4iIq1QoG8i9GAP\npVNEpFv01GJsCLUYEJGyU6BvQnXqIlJ2CvRNqE5dRMpOOfomtLAqImWnQN9FQtoUi4g0o0DfRGj7\n4G59HhHpPUXOjF1qZo+b2Q/MbJeZ/UEyfoKZfcPMfpT8e3z7phtfrBOcdFKUiMyXIouxrwMfc/df\nB9YA15vZu4AbgEfdfTnwaPJ9acUqr1QZp4jMlyJnxr7k7k8nX/8d8ANgCLgcuDu5293AaNFJdlKs\n8kqVcYrIfGlLeaWZLQNWAU8Bb3X3l6D2ZgCc1I7n6JRY5ZUq4xSR+VJ4MdbM/h7wAPCH7v5/rMXT\nmsxsPbAeYHh4uOg05k2s8kqVcYrIfDEvcLqSmfUD/xl4xN3/MhnbDZzv7i+Z2cnAN9294WXpyMiI\nT0xMBM9DRKQXmdk2dx9pdr8iVTcG3AH8oB7kE1uAdcnX64Cvhj6HiIgUVyR1cx7wIWCnmT2TjP0J\ncBtwr5ldB7wIXFlsiiIiUkRwoHf3/wFkJeQvCv25IiLSXmpqJiJScQr0IiIVV6jqpm2TMDsAvFDg\nR5wI/KxN0ykzvQ41eh1q9DrUVPl1ONXdFze7U1cE+qLMbKKVEqOq0+tQo9ehRq9DjV4HpW5ERCpP\ngV5EpOKqEug3dXoCXUKvQ41ehxq9DjU9/zpUIkcvIiLZqnJFLyIiGUod6M3sYjPbbWbPmlmpDzgp\nwsyeN7OdZvaMmfVUdzgzu9PM9pvZ92aMVeqUs1ZkvA43m9lk8nfxjJm9t5NznG+9cupdiNIGejPr\nA/4DcAnwLuDq5ISrXnWBu5/dg2VkdwEXzxmr1ClnLbqLY18HgM8kfxdnu/vDkecUW0+ceheitIEe\nOBd41t2fc/fXgHuonW4lPcTdnwAOzhmu1Clnrch4HXpKr5x6F6LMgX4I2DPj+73JWC9y4Otmti05\n0KXXVeqUs4L+tZl9N0nt9EzKosqn3oUoc6BP65zZqyVE57n7OdTSWNeb2W92ekLSFW4H3gGcDbwE\n/EVnpxPH3FPvOj2fblDmQL8XWDrj+yXAvg7NpaPcfV/y737gK9TSWr3sp8npZiT/7u/wfDrC3X/q\n7tPufgT4HD3wd5GcevcA8CUULkqSAAAA00lEQVR3fzAZ7vm/hzIH+u8Ay83sNDN7A3AVtdOteoqZ\nvdnMfqX+NfAe4HuNH1V5OuWMo0Gt7reo+N+FTr3LVuoNU0m52F8BfcCd7v7JDk8pOjN7O7WreKgd\nJPM3vfQ6mNlm4HxqHQp/CmwAxoF7gWGSU87cvdILlRmvw/nU0jYOPA/8i3quuorM7B8C/x3YCRxJ\nhv+EWp6+p/4e5ip1oBcRkebKnLoREZEWKNCLiFScAr2ISMUp0IuIVJwCvYhIxSnQi4hUnAK9iEjF\nKdCLiFTc/weEVXruHt7hPQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x21fd1e68a90>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(array(trainMat[:,0]), array(trainMat[:,1]))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 回归树预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.96408523182221406"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "myTree = createTree(trainMat, ops=(1,20))\n",
    "yHat = createForeCast(myTree, testMat[:, 0])\n",
    "corrcoef(yHat, testMat[:, 1], rowvar=0)[0,1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 模型树预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9760412191380593"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "myTree = createTree(trainMat, modelLeaf, modelErr, ops=(1,20))\n",
    "yHat = createForeCast(myTree, testMat[:,0],modelTreeEval)\n",
    "corrcoef(yHat, testMat[:, 1], rowvar=0)[0,1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
